# _*_ coding: utf-8 _*_
"""
@ 时间    ：2024/10/25 15:08
@ 作者    ：旺财
@ 文件    ：09 过采样与欠采样.py
@ 说明    ：   
"""
# 11.7.1 过采样
import pandas as pd
from collections import Counter
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler

data = pd.read_excel("信用卡数据.xlsx")
data.head()

X = data.drop(columns='分类')
y = data['分类']

Counter(y)

# （1）随机过采样

ros = RandomOverSampler(random_state=0)
X_oversampled, y_oversampled = ros.fit_resample(X, y)

print(Counter(y_oversampled))

print(X_oversampled.shape)

# （2）SMOTE过采样
smote = SMOTE(random_state=0)
X_smotesampled, y_smotesampled = smote.fit_resample(X, y)

print(Counter(y_smotesampled))

# 11.7.2 欠采样
rus = RandomUnderSampler(random_state=0)
X_undersampled, y_undersampled = rus.fit_resample(X, y)

print(Counter(y_undersampled))

print(X_undersampled.shape)
